
************************************************
*** Section 1: Instrument Setup
************************************************
clear all
set more off
cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
set mem 900m
use gravitydata.dta, clear
drop if year<1959

*Generating Modified Contiguity Variable

gen cowcontig_f = .
replace cowcontig_f = 0 if conttype == 0 | conttype == 5
replace cowcontig_f = 1 if conttype >= 1 & conttype <= 4


*** Our MX data are in millions of dollars
* US Canada in 2009 are $495,000,000,000 (state department)
* US X Canada in 2009 are $247,000,000,000
* Ray value: 433104... *1,000,000
* Need to multiply by 1,000,000

replace M = M*1000000
replace X = X*1000000
replace M = impab*1000000 if M==. & impab!=.
replace X = expab*1000000 if X==. & expab!=.

* MX_ijt, deflated to 1967 dollars, since that is what GDP is deflated to
gen MX = M + X
* Deflate to 1967 dollars
gen MX_67 = MX/uscpi67

* X_ijt, deflated to 1967 dollars, since that is what GDP is deflated to
gen X_67 = X/uscpi67
* X_ijt, deflated to 1967 dollars, since that is what GDP is deflated to
gen M_67 = M/uscpi67


* Total M+X, contiguous and noncontiguous
*bysort ccode1 year: egen totalIMEX = total(MX)
*bysort ccode1 year: egen totalIMEX_nocontig_xx = total(MX) if cowcontig_f == 0
*bysort ccode1 year: egen totalIMEX_nocontig = min(totalIMEX_nocontig_xx)
* Total X, contiguous and noncontiguous
*bysort ccode1 year: egen totalEX = total(X)
*bysort ccode1 year: egen totalEX_nocontig_xx = total(X) if cowcontig_f == 0
*bysort ccode1 year: egen totalEX_nocontig = min(totalEX_nocontig_xx)


***
* Things "to be weighted"
***

*** loggdp_jt
* Generating loggdp_jt
gen lognewgdp_67_B = log(newgdp_67_B)
* Generating a dummy for whether we have GDP data for ccode2 in year t
*gen marker=0
*replace marker=1 if lognewgdp_67_B !=.

*** loggdp_jt-loggdp-jt-1
* Generating loggdp_jt - loggdp_jt-1
tostring ccode1, gen(ccode1str)
tostring ccode2, gen(ccode2str)
gen ccode12str = ccode1str+"_"+ccode2str
egen ccode12_id = group(ccode12str)
xtset ccode12_id year
gen Dnewgdp_67_B = d.lognewgdp_67_B
* Generating a dummy for whether we have D.GDP data for ccode2 in year t
gen Dmarker=0
replace Dmarker=1 if Dnewgdp_67_B !=.

*** using LAG loggdp_jt instead of loggdp_jt
gen test = l.lognewgdp_67_B
replace lognewgdp_67_B=test
* Generating a dummy for whether we have GDP data for ccode2 in year t
gen marker=0
replace marker=1 if lognewgdp_67_B !=.



forvalues i = 7(1)9 {

*** With loggdp_jt
*** Generating weights based on 19i0's
* Sum of MX_ijt/Y_it, for 19i0's, for years in which data are available
bysort ccode1 ccode2: egen w_ij_xx = mean(MX_67/newgdp_67_A) if year>=19`i'0 & year<=19`i'9
bysort ccode1 ccode2: egen w_ij = max(w_ij_xx)
bysort ccode1 ccode2: egen w_ij_NoContig_xx = mean(MX_67/newgdp_67_A) if year>=19`i'0 & year<=19`i'9 & cowcontig_f == 0 
bysort ccode1 ccode2: egen w_ij_NoContig = max(w_ij_NoContig_xx)

drop w_ij_xx w_ij_NoContig_xx

* First term, Sum of w_ij*(log(Y_jt))
bysort ccode1 year: egen firstterm = total(w_ij*lognewgdp_67_B), missing
bysort ccode1 year: egen firstterm_NoContig = total(w_ij_NoContig*lognewgdp_67_B), missing
* Third term denomenator, total w_ij for which we have GDP data for year t
bysort ccode1 year: egen thirdterm_denom_xx = total(w_ij) if marker==1
bysort ccode1 year: egen thirdterm_denom = max(thirdterm_denom_xx)
bysort ccode1 year: egen thirdterm_denom_NoContig_xx = total(w_ij_NoContig) if marker==1
bysort ccode1 year: egen thirdterm_denom_NoContig = max(thirdterm_denom_NoContig_xx)
* Third term numerator, total w_ij when we have GDP data for year t
bysort ccode1: egen thirdterm_num = max(thirdterm_denom)
bysort ccode1 year: gen thirdterm = thirdterm_num/thirdterm_denom
bysort ccode1: egen thirdterm_num_NoContig = max(thirdterm_denom_NoContig)
bysort ccode1 year: gen thirdterm_NoContig = thirdterm_num_NoContig/thirdterm_denom_NoContig
* Y_it^hat
bysort ccode1 year: gen yhat_log_`i'0s=firstterm*(thirdterm_num/thirdterm_denom)
bysort ccode1 year: gen yhat_log_NoContig_`i'0s=firstterm_NoContig*(thirdterm_num_NoContig/thirdterm_denom_NoContig)

*** With loggdp_jt - loggdp_jt-1
* First term, Sum of w_ij*(log(Y_jt))
bysort ccode1 year: egen Dfirstterm = total(w_ij*Dnewgdp_67_B), missing
bysort ccode1 year: egen Dfirstterm_NoContig = total(w_ij_NoContig*Dnewgdp_67_B), missing
* Third term denomenator, total w_ij for which we have GDP data for year t
bysort ccode1 year: egen Dthirdterm_denom_xx = total(w_ij) if Dmarker==1
bysort ccode1 year: egen Dthirdterm_denom = max(Dthirdterm_denom_xx)
bysort ccode1 year: egen Dthirdterm_denom_NoContig_xx = total(w_ij_NoContig) if Dmarker==1
bysort ccode1 year: egen Dthirdterm_denom_NoContig = max(Dthirdterm_denom_NoContig_xx)
* Third term numerator, total w_ij when we have GDP data for year t
bysort ccode1: egen Dthirdterm_num = max(Dthirdterm_denom)
bysort ccode1 year: gen Dthirdterm = Dthirdterm_num/Dthirdterm_denom
bysort ccode1: egen Dthirdterm_num_NoContig = max(Dthirdterm_denom_NoContig)
bysort ccode1 year: gen Dthirdterm_NoContig = Dthirdterm_num_NoContig/Dthirdterm_denom_NoContig
* Y_it^hat
bysort ccode1 year: gen yhat_D_`i'0s=Dfirstterm*(Dthirdterm_num/Dthirdterm_denom)
bysort ccode1 year: gen yhat_D_NoContig_`i'0s=Dfirstterm_NoContig*(Dthirdterm_num_NoContig/Dthirdterm_denom_NoContig)
drop *term* w_ij w_ij_NoContig
}
*

*** Repeating, with weights based on X only
forvalues i = 7(1)9 {

*** With loggdp_jt
*** Generating weights based on 19i0's
* Sum of X_ijt/Y_it, for 19i0's, for years in which data are available
bysort ccode1 ccode2: egen w_ij_xx = mean(X_67/newgdp_67_A) if year>=19`i'0 & year<=19`i'9
bysort ccode1 ccode2: egen w_ij = max(w_ij_xx)
bysort ccode1 ccode2: egen w_ij_NoContig_xx = mean(X_67/newgdp_67_A) if year>=19`i'0 & year<=19`i'9 & cowcontig_f == 0 
bysort ccode1 ccode2: egen w_ij_NoContig = max(w_ij_NoContig_xx)

drop w_ij_xx w_ij_NoContig_xx

* First term, Sum of w_ij*(log(Y_jt))
bysort ccode1 year: egen firstterm = total(w_ij*lognewgdp_67_B), missing
bysort ccode1 year: egen firstterm_NoContig = total(w_ij_NoContig*lognewgdp_67_B), missing
* Third term denomenator, total w_ij for which we have GDP data for year t
bysort ccode1 year: egen thirdterm_denom_xx = total(w_ij) if marker==1
bysort ccode1 year: egen thirdterm_denom = max(thirdterm_denom_xx)
bysort ccode1 year: egen thirdterm_denom_NoContig_xx = total(w_ij_NoContig) if marker==1
bysort ccode1 year: egen thirdterm_denom_NoContig = max(thirdterm_denom_NoContig_xx)
* Third term numerator, total w_ij when we have GDP data for year t
bysort ccode1: egen thirdterm_num = max(thirdterm_denom)
bysort ccode1 year: gen thirdterm = thirdterm_num/thirdterm_denom
bysort ccode1: egen thirdterm_num_NoContig = max(thirdterm_denom_NoContig)
bysort ccode1 year: gen thirdterm_NoContig = thirdterm_num_NoContig/thirdterm_denom_NoContig
* Y_it^hat
bysort ccode1 year: gen yhat_log_`i'0s_X=firstterm*(thirdterm_num/thirdterm_denom)
bysort ccode1 year: gen yhat_log_NoContig_`i'0s_X=firstterm_NoContig*(thirdterm_num_NoContig/thirdterm_denom_NoContig)

*** With loggdp_jt - loggdp_jt-1
* First term, Sum of w_ij*(log(Y_jt))
bysort ccode1 year: egen Dfirstterm = total(w_ij*Dnewgdp_67_B), missing
bysort ccode1 year: egen Dfirstterm_NoContig = total(w_ij_NoContig*Dnewgdp_67_B), missing
* Third term denomenator, total w_ij for which we have GDP data for year t
bysort ccode1 year: egen Dthirdterm_denom_xx = total(w_ij) if Dmarker==1
bysort ccode1 year: egen Dthirdterm_denom = max(Dthirdterm_denom_xx)
bysort ccode1 year: egen Dthirdterm_denom_NoContig_xx = total(w_ij_NoContig) if Dmarker==1
bysort ccode1 year: egen Dthirdterm_denom_NoContig = max(Dthirdterm_denom_NoContig_xx)
* Third term numerator, total w_ij when we have GDP data for year t
bysort ccode1: egen Dthirdterm_num = max(Dthirdterm_denom)
bysort ccode1 year: gen Dthirdterm = Dthirdterm_num/Dthirdterm_denom
bysort ccode1: egen Dthirdterm_num_NoContig = max(Dthirdterm_denom_NoContig)
bysort ccode1 year: gen Dthirdterm_NoContig = Dthirdterm_num_NoContig/Dthirdterm_denom_NoContig
* Y_it^hat
bysort ccode1 year: gen yhat_D_`i'0s_X=Dfirstterm*(Dthirdterm_num/Dthirdterm_denom)
bysort ccode1 year: gen yhat_D_NoContig_`i'0s_X=Dfirstterm_NoContig*(Dthirdterm_num_NoContig/Dthirdterm_denom_NoContig)
drop *term* w_ij w_ij_NoContig
}
*

*** Repeating, with weights based on M only
forvalues i = 7(1)9 {

*** With loggdp_jt
*** Generating weights based on 19i0's
* Sum of M_ijt/Y_it, for 19i0's, for years in which data are available
bysort ccode1 ccode2: egen w_ij_xx = mean(M_67/newgdp_67_A) if year>=19`i'0 & year<=19`i'9
bysort ccode1 ccode2: egen w_ij = max(w_ij_xx)
bysort ccode1 ccode2: egen w_ij_NoContig_xx = mean(M_67/newgdp_67_A) if year>=19`i'0 & year<=19`i'9 & cowcontig_f == 0 
bysort ccode1 ccode2: egen w_ij_NoContig = max(w_ij_NoContig_xx)

drop w_ij_xx w_ij_NoContig_xx

* First term, Sum of w_ij*(log(Y_jt))
bysort ccode1 year: egen firstterm = total(w_ij*lognewgdp_67_B), missing
bysort ccode1 year: egen firstterm_NoContig = total(w_ij_NoContig*lognewgdp_67_B), missing
* Third term denomenator, total w_ij for which we have GDP data for year t
bysort ccode1 year: egen thirdterm_denom_xx = total(w_ij) if marker==1
bysort ccode1 year: egen thirdterm_denom = max(thirdterm_denom_xx)
bysort ccode1 year: egen thirdterm_denom_NoContig_xx = total(w_ij_NoContig) if marker==1
bysort ccode1 year: egen thirdterm_denom_NoContig = max(thirdterm_denom_NoContig_xx)
* Third term numerator, total w_ij when we have GDP data for year t
bysort ccode1: egen thirdterm_num = max(thirdterm_denom)
bysort ccode1 year: gen thirdterm = thirdterm_num/thirdterm_denom
bysort ccode1: egen thirdterm_num_NoContig = max(thirdterm_denom_NoContig)
bysort ccode1 year: gen thirdterm_NoContig = thirdterm_num_NoContig/thirdterm_denom_NoContig
* Y_it^hat
bysort ccode1 year: gen yhat_log_`i'0s_M=firstterm*(thirdterm_num/thirdterm_denom)
bysort ccode1 year: gen yhat_log_NoContig_`i'0s_M=firstterm_NoContig*(thirdterm_num_NoContig/thirdterm_denom_NoContig)

*** With loggdp_jt - loggdp_jt-1
* First term, Sum of w_ij*(log(Y_jt))
bysort ccode1 year: egen Dfirstterm = total(w_ij*Dnewgdp_67_B), missing
bysort ccode1 year: egen Dfirstterm_NoContig = total(w_ij_NoContig*Dnewgdp_67_B), missing
* Third term denomenator, total w_ij for which we have GDP data for year t
bysort ccode1 year: egen Dthirdterm_denom_xx = total(w_ij) if Dmarker==1
bysort ccode1 year: egen Dthirdterm_denom = max(Dthirdterm_denom_xx)
bysort ccode1 year: egen Dthirdterm_denom_NoContig_xx = total(w_ij_NoContig) if Dmarker==1
bysort ccode1 year: egen Dthirdterm_denom_NoContig = max(Dthirdterm_denom_NoContig_xx)
* Third term numerator, total w_ij when we have GDP data for year t
bysort ccode1: egen Dthirdterm_num = max(Dthirdterm_denom)
bysort ccode1 year: gen Dthirdterm = Dthirdterm_num/Dthirdterm_denom
bysort ccode1: egen Dthirdterm_num_NoContig = max(Dthirdterm_denom_NoContig)
bysort ccode1 year: gen Dthirdterm_NoContig = Dthirdterm_num_NoContig/Dthirdterm_denom_NoContig
* Y_it^hat
bysort ccode1 year: gen yhat_D_`i'0s_M=Dfirstterm*(Dthirdterm_num/Dthirdterm_denom)
bysort ccode1 year: gen yhat_D_NoContig_`i'0s_M=Dfirstterm_NoContig*(Dthirdterm_num_NoContig/Dthirdterm_denom_NoContig)
drop *term* w_ij w_ij_NoContig
}
*

*duplicates drop ccode1 year, force
*drop ccode2  M X expab eabo impab iabo tomz_gdp_67_B dtomz_gdp_67_B cpi_B gdp_wb_B newgdp_67_B pop_pwt_B rgdpch_B pop_madd_B pcgdp_B wtojoinB gattjoinB onegatt gattwto lat_2 lon_2 distcap2 contig dist distcap distw distwces mid_onset armconflict numb_mids conttype cowcontig MX MX_67 w_ij marker thirdterm_denom_xx thirdterm_denom thirdterm_num thirdterm democ_B autoc_B polity_B polity2_B
drop M X expab eabo impab iabo tomz_gdp_67_B dtomz_gdp_67_B cpi_B gdp_wb_B newgdp_67_B pop_pwt_B rgdpch_B pop_madd_B pcgdp_B wtojoinB gattjoinB onegatt gattwto lat_2 lon_2 distcap2 dist distcap distw distwces mid_onset armconflict numb_mids conttype MX MX_67
drop marker democ_B autoc_B polity_B polity2_B



***
* SEQUENCE
***

* Generating the sequence instrument
gen yhat_log_sequence = yhat_log_80s
replace yhat_log_sequence = yhat_log_70s if yhat_log_80s==.
replace yhat_log_sequence = yhat_log_90s if yhat_log_80s==. & yhat_log_70s==.
gen source=.
replace source = 1 if yhat_log_80s!=.
replace source = 2 if yhat_log_80s==. & yhat_log_70s!=.
replace source = 3 if yhat_log_80s==. & yhat_log_70s==. & yhat_log_90s!=.

gen yhat_log_sequence_NoContig = yhat_log_NoContig_80s
replace yhat_log_sequence_NoContig = yhat_log_NoContig_70s if yhat_log_NoContig_80s==.
replace yhat_log_sequence_NoContig = yhat_log_NoContig_90s if yhat_log_NoContig_80s==. & yhat_log_NoContig_70s==.
gen source_NoContig=.
replace source_NoContig = 1 if yhat_log_NoContig_80s!=.
replace source_NoContig = 2 if yhat_log_NoContig_80s==. & yhat_log_NoContig_70s!=.
replace source_NoContig = 3 if yhat_log_NoContig_80s==. & yhat_log_NoContig_70s==. & yhat_log_NoContig_90s!=.

* Generating the sequence instrument...... CHANGE
gen yhat_D_sequence = yhat_D_80s
replace yhat_D_sequence = yhat_D_70s if yhat_D_80s==.
replace yhat_D_sequence = yhat_D_90s if yhat_D_80s==. & yhat_D_70s==.
gen Dsource=.
replace Dsource = 1 if yhat_D_80s!=.
replace Dsource = 2 if yhat_D_80s==. & yhat_D_70s!=.
replace Dsource = 3 if yhat_D_80s==. & yhat_D_70s==. & yhat_D_90s!=.

gen yhat_D_sequence_NoContig = yhat_D_NoContig_80s
replace yhat_D_sequence_NoContig = yhat_D_NoContig_70s if yhat_D_NoContig_80s==.
replace yhat_D_sequence_NoContig = yhat_D_NoContig_90s if yhat_D_NoContig_80s==. & yhat_D_NoContig_70s==.
gen Dsource_NoContig=.
replace Dsource_NoContig = 1 if yhat_D_NoContig_80s!=.
replace Dsource_NoContig = 2 if yhat_D_NoContig_80s==. & yhat_D_NoContig_70s!=.
replace Dsource_NoContig = 3 if yhat_D_NoContig_80s==. & yhat_D_NoContig_70s==. & yhat_D_NoContig_90s!=.

* Generating the sequence instrument (X)
gen yhat_log_sequence_X = yhat_log_80s_X
replace yhat_log_sequence_X = yhat_log_70s_X if yhat_log_80s_X==.
replace yhat_log_sequence_X = yhat_log_90s_X if yhat_log_80s_X==. & yhat_log_70s_X==.
gen source_X=.
replace source_X = 1 if yhat_log_80s_X!=.
replace source_X = 2 if yhat_log_80s_X==. & yhat_log_70s_X!=.
replace source_X = 3 if yhat_log_80s_X==. & yhat_log_70s_X==. & yhat_log_90s_X!=.

gen yhat_log_sequence_NoContig_X = yhat_log_NoContig_80s_X
replace yhat_log_sequence_NoContig_X = yhat_log_NoContig_70s_X if yhat_log_NoContig_80s_X==.
replace yhat_log_sequence_NoContig_X = yhat_log_NoContig_90s_X if yhat_log_NoContig_80s_X==. & yhat_log_NoContig_70s_X==.
gen source_NoContig_X=.
replace source_NoContig_X = 1 if yhat_log_NoContig_80s_X!=.
replace source_NoContig_X = 2 if yhat_log_NoContig_80s_X==. & yhat_log_NoContig_70s_X!=.
replace source_NoContig_X = 3 if yhat_log_NoContig_80s_X==. & yhat_log_NoContig_70s_X==. & yhat_log_NoContig_90s_X!=.

* Generating the sequence instrument...... CHANGE (X)
gen yhat_D_sequence_X = yhat_D_80s_X
replace yhat_D_sequence_X = yhat_D_70s_X if yhat_D_80s_X==.
replace yhat_D_sequence_X = yhat_D_90s_X if yhat_D_80s_X==. & yhat_D_70s_X==.
gen Dsource_X=.
replace Dsource_X = 1 if yhat_D_80s_X!=.
replace Dsource_X = 2 if yhat_D_80s_X==. & yhat_D_70s_X!=.
replace Dsource_X = 3 if yhat_D_80s_X==. & yhat_D_70s_X==. & yhat_D_90s_X!=.

gen yhat_D_sequence_NoContig_X = yhat_D_NoContig_80s_X
replace yhat_D_sequence_NoContig_X = yhat_D_NoContig_70s_X if yhat_D_NoContig_80s_X==.
replace yhat_D_sequence_NoContig_X = yhat_D_NoContig_90s_X if yhat_D_NoContig_80s_X==. & yhat_D_NoContig_70s_X==.
gen Dsource_NoContig_X=.
replace Dsource_NoContig_X = 1 if yhat_D_NoContig_80s_X!=.
replace Dsource_NoContig_X = 2 if yhat_D_NoContig_80s_X==. & yhat_D_NoContig_70s_X!=.
replace Dsource_NoContig_X = 3 if yhat_D_NoContig_80s_X==. & yhat_D_NoContig_70s_X==. & yhat_D_NoContig_90s_X!=.

* Generating the sequence instrument (M)
gen yhat_log_sequence_M = yhat_log_80s_M
replace yhat_log_sequence_M = yhat_log_70s_M if yhat_log_80s_M==.
replace yhat_log_sequence_M = yhat_log_90s_M if yhat_log_80s_M==. & yhat_log_70s_M==.
gen source_M=.
replace source_M = 1 if yhat_log_80s_M!=.
replace source_M = 2 if yhat_log_80s_M==. & yhat_log_70s_M!=.
replace source_M = 3 if yhat_log_80s_M==. & yhat_log_70s_M==. & yhat_log_90s_M!=.

gen yhat_log_sequence_NoContig_M = yhat_log_NoContig_80s_M
replace yhat_log_sequence_NoContig_M = yhat_log_NoContig_70s_M if yhat_log_NoContig_80s_M==.
replace yhat_log_sequence_NoContig_M = yhat_log_NoContig_90s_M if yhat_log_NoContig_80s_M==. & yhat_log_NoContig_70s_M==.
gen source_NoContig_M=.
replace source_NoContig_M = 1 if yhat_log_NoContig_80s_M!=.
replace source_NoContig_M = 2 if yhat_log_NoContig_80s_M==. & yhat_log_NoContig_70s_M!=.
replace source_NoContig_M = 3 if yhat_log_NoContig_80s_M==. & yhat_log_NoContig_70s_M==. & yhat_log_NoContig_90s_M!=.

* Generating the sequence instrument...... CHANGE (M)
gen yhat_D_sequence_M = yhat_D_80s_M
replace yhat_D_sequence_M = yhat_D_70s_M if yhat_D_80s_M==.
replace yhat_D_sequence_M = yhat_D_90s_M if yhat_D_80s_M==. & yhat_D_70s_M==.
gen Dsource_M=.
replace Dsource_M = 1 if yhat_D_80s_M!=.
replace Dsource_M = 2 if yhat_D_80s_M==. & yhat_D_70s_M!=.
replace Dsource_M = 3 if yhat_D_80s_M==. & yhat_D_70s_M==. & yhat_D_90s_M!=.

gen yhat_D_sequence_NoContig_M = yhat_D_NoContig_80s_M
replace yhat_D_sequence_NoContig_M = yhat_D_NoContig_70s_M if yhat_D_NoContig_80s_M==.
replace yhat_D_sequence_NoContig_M = yhat_D_NoContig_90s_M if yhat_D_NoContig_80s_M==. & yhat_D_NoContig_70s_M==.
gen Dsource_NoContig_M=.
replace Dsource_NoContig_M = 1 if yhat_D_NoContig_80s_M!=.
replace Dsource_NoContig_M = 2 if yhat_D_NoContig_80s_M==. & yhat_D_NoContig_70s_M!=.
replace Dsource_NoContig_M = 3 if yhat_D_NoContig_80s_M==. & yhat_D_NoContig_70s_M==. & yhat_D_NoContig_90s_M!=.



drop *70s* *80s* *90s* *marker* *source*

duplicates drop ccode1 year, force

cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
saveold instrument_2014_06_20.dta, replace



************************************************
*** Section 2: UppsalaPRIO Setup
************************************************
* This subsection sets up the bdead_cy.dta file, which has the number of battle deaths for country-year combinations.

set more off
clear
set matsize 800

cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
insheet using bdead3.csv, comma clear nonames

rename v1 id
rename v2 year
rename v3 bdeadlow
rename v4 bdeadhig
rename v5 bdeadbes
rename v6 annualdata
rename v7 source
rename v8 bdversion
rename v9 location
rename v10 sidea
rename v11 sidea2nd
rename v12 sideb
rename v13 sideb2nd
rename v14 incomp
rename v15 terr
rename v16 intt
rename v17 cumint
rename v18 type
rename v19 startdate
rename v20 startprec
rename v21 startdate2
rename v22 startprec2
rename v23 epend
rename v24 ependdate
rename v25 ependprec
rename v26 gwnoa
rename v27 gwnoa2nd
rename v28 gwnob
rename v29 gwnob2nd
rename v30 gwnloc
rename v31 region
rename v32 version

keep if type==3 | type==4
*keep if type==3

rename gwnloc ccode1
sort ccode1 year

bysort id: egen bsdata_total = min(annualdata)
bysort ccode1 year: egen bsdata = min(bsdata_total)

drop if ccode1==-99

*
foreach var in bdeadlow bdeadhig bdeadbes {
	replace `var'=. if `var'==-999
		forvalues i=3(1)4 {
			gen `var'_`i' = .
			replace `var'_`i' = `var' if type==`i'
			}
	}
*
forvalues i=3(1)4 {

	gen bdeadbes_`i'_noav = .
	replace bdeadbes_`i'_noav = bdeadbes_`i'
	
	gen bdeadbes_`i'_av = .
	replace bdeadbes_`i'_av = bdeadbes_`i'	
	replace bdeadbes_`i'_av = (bdeadlow_`i'+bdeadhig_`i')/2 if bdeadbes_`i'_av==.
	
	gen miss_`i' = 0
	replace miss_`i' = 1 if bdeadbes_`i'_av != bdeadbes_`i'_noav	

	replace bdeadbes_`i'_av = 0 if type!=`i'
	replace bdeadbes_`i'_noav = 0 if type!=`i'	
	}
*

forvalues i=3(1)4 {
	bysort ccode1 year: egen bdeadbes_`i'_total_noav = total(bdeadbes_`i'_noav), missing
	bysort ccode1 year: egen bdeadbes_`i'_total_av = total(bdeadbes_`i'_av), missing
	bysort ccode1 year: egen miss_`i'_total = total(miss_`i')	

	bysort ccode1 year: egen bdeadlow_`i'_total = total(bdeadlow_`i')
	bysort ccode1 year: egen bdeadhig_`i'_total = total(bdeadhig_`i')
	}
*

gen bdeadbes_34_av = bdeadbes_3_total_av + bdeadbes_4_total_av
gen bdeadbes_34_noav = bdeadbes_3_total_noav + bdeadbes_4_total_noav
gen miss_34_total = miss_3_total + miss_4_total

replace bdeadbes_3_av = bdeadbes_3_total_av
replace bdeadbes_3_noav = bdeadbes_3_total_noav
replace miss_3 = miss_3_total
replace miss_4 = miss_4_total
gen miss_34 = miss_34_total

replace bdeadlow_3 = bdeadlow_3_total
replace bdeadhig_3 = bdeadhig_3_total
replace bdeadlow_4 = bdeadlow_4_total
replace bdeadhig_4 = bdeadhig_4_total

*
drop *_total
duplicates drop ccode1 year, force

keep bdeadbes_3_av bdeadbes_3_noav bdeadbes_34_av bdeadbes_34_noav bdeadlow_3 bdeadlow_4 bdeadhig_3 bdeadhig_4 ccode1 year bsdata miss*

foreach var in bdeadbes_3_noav bdeadbes_3_av bdeadbes_34_noav bdeadbes_34_av bdeadlow_3 bdeadlow_4 bdeadhig_3 bdeadhig_4 {
	recode `var' (. = -999)
	}


cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
saveold bdead_cy_2014_06_20.dta, replace


************************************************
*** Section 3: Merging the Instrument Data, UPPrio Data, and Fearon/Laitin Data
************************************************
cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
use instrument_2014_06_20.dta, clear

merge m:m ccode1 year using bdead_cy_2014_06_20.dta, gen(t1)

* Battle deaths are coded as missing if there are not battle deaths that year.  This fixes that.
foreach var in bdeadbes_3_noav bdeadbes_3_av bdeadbes_34_noav bdeadbes_34_av bdeadlow_3 bdeadlow_4 bdeadhig_3 bdeadhig_4 {
	recode `var' (. = 0)
	recode `var' (-999 = .)
	}
*

rename ccode1 ccode
merge 1:1 ccode year using repdata.dta, gen(t2)

* Assigning FL region codes to all years for a country
bysort ccode: egen region2=max(region)
* Generating a dummy variable if that country was ever in the FL sample
gen fldum=0
bysort ccode: egen fldum_xx=max(t2)
replace fldum=1 if fldum_xx==3
drop *_xx

cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
saveold workingugly_2014_06_20.dta, replace

************************************************
*** Section 4: Cosmetics
************************************************
cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
use workingugly_2014_06_20.dta, clear

* y_it is log(gdp) - log(pop)
gen y_it = log(newgdp_67_A)-log(pop_A*1000)

drop t1 t2
drop *_B*

quietly tab year, gen(YR)
quietly tab ccode, gen(CFE)
local i=1
while `i'< 215 {
	gen CSTT`i' = CFE`i'*(year-1960)
	label variable CSTT`i' "Country-Specific Time Trend for Iccode`i'"
	local i = `i' + 1
	}
*

xtset ccode year

*gen yhat_growth = (yhat-l.yhat)/l.yhat
*gen lyhat_growth = l.yhat_growth
*gen y_it_diff = y_it-l.y_it

keep if year>1959 & year<2009

* Adding MSS instrument, cleaning up some variable names, and then ...

sort ccode year 
merge 1:1 ccode year using MSS_rainfall.dta, gen(mergeRain)

gen peace = bdeadbes_3_av == 0 & l.bdeadbes_3_av>0 & l.bdeadbes_3_av<.
gen lagLogAv = log(l.bdeadbes_3_av)
gen logAv = log(bdeadbes_3_av)

gen warFromBDead = bdeadbes_3_av>0 if bdeadbes_3_av<.

gen lagLogNoAv = log(l.bdeadbes_3_noav)
gen logNoAv = log(bdeadbes_3_noav)

label var lagLogAv "Log Battle Deaths at t-1"
label var logAv "Log Battle Deaths at t"

** Uses the no-contiguity instrument
gen yhat_log = yhat_log_sequence_NoContig_X
gen yhat_AR = yhat_log_sequence_NoContig

*** DV, onset from battle deaths
sort ccode year
gen onsetFromBDead = .
replace onsetFromBDead = 0 if  warFromBDead == 0
by ccode: replace onsetFromBDead = 1 if warFromBDead == 1 & warFromBDead[_n-1] == 0
by ccode: replace onsetFromBDead = 0 if warFromBDead == 1 & warFromBDead[_n-1] == 1

foreach i in bdeadbes_3_av bdeadbes_3_noav bdeadbes_34_av bdeadbes_34_noav war warFromBDead onset onsetFromBDead y_it yhat_log {
gen l`i' = l.`i'
}
*
gen l2bdeadbes_3_av = l.lbdeadbes_3_av
gen l3bdeadbes_3_av = l.l2bdeadbes_3_av

* Year of conflict
sort ccode year
by ccode : gen conflictyear = warFromBDead==1
by ccode : replace conflictyear = conflictyear[_n-1]+1 if conflictyear==1 & l.conflictyear~=0 & l.conflictyear~=. 
quietly tab conflictyear, gen(conflyear_)

tsspell, c(conflictyear>=1)
egen conflictlength = max(_seq), by(ccode _spell)



cd "C:\Users\Stephen Chaudoin\Desktop\Civil War Replication\"
saveold working_2014_06_30.dta, replace


